*** Code for "The Labor Market Integration of Refugee Migrants in High-Income Countries"
*** Courtney Brell, Christian Dustmann, and Ian Preston
***
*** Analysis of the American Community Survey
*** This file should be run in the folder containing the IPUMS 2017 5-year ACS data
*** Also requires YISrefugees.dta to be placed in the same folder
***

********************************************************************************
*** Preliminaries

clear all

*** Identify refugee country-year pairs
use "IPUMS_2017_5year", clear

* Keep only non-natives migrating in the period we have YIS data for (up to 2016)
keep if bpl>=150 & bpl<900
keep if yrimmig>=1990&yrimmig<=2016
* Calculate immigration numbers by year and country
gen dummy=1
collapse (sum) totalimmigrants=dummy (rawsum) rawimmigrants=dummy [fw=perwt], by(yrimmig bpld)
reshape wide totalimmigrants rawimmigrants, i(bpld) j(yrimmig)

* Merge in refugee numbers from the Yearbook of Immigration Statistics (restricted to country of origin-year pairs that contain at least 50 refugees)
merge 1:1 bpld using "US-YISrefugees", keep(match) nogenerate

* Calculate refugee/immigrant ratio
reshape long totalimmigrants rawimmigrants ref, i(bpld) j(yrimmig)
rename ref refugees
gen refugeeratio=refugees/totalimmigrants

* Pick out refugee country-year pairs
gen abovethreshold=0
replace abovethreshold=1 if refugeeratio>=0.7&!missing(refugeeratio)

rename abovethreshold refugee_countryyearpair
keep bpld yrimmig refugee_countryyearpair
save "refugee_countryyearpairs", replace

*** Set up sample
use "IPUMS_2017_5year", clear

* Generate demographic variables
gen countryoforigin=bpld
label values countryoforigin BPLD
replace countryoforigin=1 if bpld<15000
replace countryoforigin=. if bpld>=90000
gen female = .
replace female=1 if sex==2
replace female=0 if sex==1

* Calculate the number of years since arrival
gen yeararrived=yrimmig
gen yearofinterview=multyear
gen yearssincearrive=yrsusa1

* Identify migrant types
* Merge in refugee status (note there may be non master and match entries from the reshaping done in the construction of country-year pairs)
merge m:1 bpld yrimmig using "refugee_countryyearpairs", keep(master match) nogenerate
gen native=1 if countryoforigin==1&yeararrived==0
gen refugee=(countryoforigin!=1&yeararrived!=0&!missing(yeararrived)&refugee_countryyearpair==1)
gen immigrant=(countryoforigin!=1&yeararrived!=0&!missing(yeararrived)&refugee!=1)
gen migranttype=0 if native==1
replace migranttype=1 if refugee==1
replace migranttype=2 if immigrant==1
label define Lmigrant 0 "Native" 1 "Refugee" 2 "Other immigrant"
label values migranttype Lmigrant
replace yearssincearrive=0 if migranttype==0

* Choose our sample
drop if missing(migranttype)
keep if age>=20&age<=64
* Keep up to 20 years after arrival for now, since we will use it for language plots
keep if (yearssincearrive>=0&yearssincearrive<=20)
* Use survey weights
gen indweight=perwt

********************************************************************************
*** Calculate labor market outcomes

* Employment
gen employment=1 if empstat==1
replace employment=0 if empstat==2|empstat==3

* Wages
gen wage=incwage if incwage>0 // Annual wage censored at 99.5th state percentile, topcoded by mean above censor limit
replace wage=. if employment==0
replace wage=. if wage==999999|wage==999998
replace wage=wage*7/365.2425 // Convert to weekly wage

* Count numbers of nonmissing observations
gen Nemp=!missing(employment)
gen Ninc=!missing(wage)

preserve
collapse (mean) employment avg_income=wage (rawsum) Nemp Ninc [aw=indweight], by(female yearssincearrive migranttype)
save "US-ACS", replace
restore
preserve
collapse (mean) employment avg_income=wage (rawsum) Nemp Ninc [aw=indweight], by(yearssincearrive migranttype)
append using "US-ACS"
order yearssincearrive migranttype female employment Nemp avg_income Ninc
sort migranttype female yearssincearrive
save "US-ACS", replace
restore

********************************************************************************
*** Calculate sample descriptives

forval loopmig=0(1)2{
preserve
	disp `loopmig'
	keep if migranttype==`loopmig'
	keep if yearssincearrive<=10
	
	* # Observations
	count

	* Gender
	sum female [aw=indweight]
	* Age
	sum age [aw=indweight], detail

	* Time since arrival
	sum yearssincearrive [aw=indweight], detail

	* Age at arrival
	gen age_at_arrival=age-yearssincearrive
	sum age_at_arrival [aw=indweight], detail

	* Year of arrival
	gen year_of_arrival=yearofinterview-yearssincearrive
	sum year_of_arrival [aw=indweight], detail

	* LM outcomes
	count if !missing(employment)
	sum employment [aw=indweight], detail
	count if !missing(wage)
	sum wage [aw=indweight], detail
	
	* Country of origin
	capture gen dummy=1
	collapse (sum) numrefugees=dummy [iw=indweight], by(countryoforigin)
	egen totalrefugees=sum(numrefugees)
	gen fracrefugees=numrefugees/totalrefugees
	gsort -fracrefugees
	gen thecounter=_n
	list countryoforigin fracrefugees if thecounter<=10
restore
}

********************************************************************************
*** Generate language plots

preserve
	* Interested only in non-natives
	keep if migranttype!=0

	* Calculate language outcomes of interest
	gen englishprof=(speakeng==3|speakeng==4|speakeng==5)
	replace lingisol=. if lingisol==0
	replace lingisol=lingisol-1

	capture gen dummy=1 
	collapse (mean) englishprof lingisol (rawsum) N=dummy [aw=indweight], by(yearssincearrive migranttype)

	* Plot results
	twoway ///
		(connected englishprof yearssincearrive if migranttype==1, sort) ///
		(connected englishprof yearssincearrive if migranttype==2, sort) ///
		, legend(order(1 "Refugee" 2 "Other immigrant")) xtitle("Years since migration") ytitle("Proportion who speak English well") 
	graph save "US_ACS_lang", replace
	twoway ///
		(connected lingisol yearssincearrive if migranttype==1, sort) ///
		(connected lingisol yearssincearrive if migranttype==2, sort) ///
		, legend(order(1 "Refugee" 2 "Other Immigrant")) xtitle("Years since migration") ytitle("Rate of linguistic isolation") 
	graph save "US_ACS_lingisol", replace
restore

********************************************************************************
*** Clean up

capture noisily erase "refugee_countryyearpairs.dta"
clear all
